import numpy as np

def robust_fit(B_hat, latent_dim_max=10, lambda_reg=0.1, allow_individual_alpha=True):
    K = B_hat.shape[0]
    L = np.log(B_hat + 1e-12) - np.log(1 - B_hat + 1e-12)
    L = (L + L.T)/2
    vals, vecs = np.linalg.eigh(L)
    idx = np.argsort(vals)[::-1]
    vals = vals[idx]
    vecs = vecs[:, idx]

    # shrink
    vals_shrunk = np.maximum(vals - lambda_reg, 0.0)

    # choose latent_dim by energy (or use latent_dim_max)
    total_energy = vals_shrunk.sum()
    cum = np.cumsum(vals_shrunk)
    # e.g. choose smallest d with cum/total_energy >= 0.99 or at most latent_dim_max
    thresh = 0.99
    d = min(latent_dim_max, np.searchsorted(cum/total_energy, thresh) + 1)
    d = max(1, d)

    top_vals = vals_shrunk[:d]
    top_vecs = vecs[:, :d]
    mu = top_vecs * np.sqrt(top_vals[None, :])

    if allow_individual_alpha:
        alpha = (np.diag(L) - np.sum(mu**2, axis=1)) / 2.0
    else:
        # global alpha: fit by minimizing ||exp(mu mu^T + 2a)-B||, approximate by least squares on log-domain
        a = np.mean((np.diag(L) - np.sum(mu**2, axis=1))/2.0)
        alpha = a * np.ones(K)

    B_rec = 1 / (1 + np.exp(-(mu @ mu.T + alpha[:, None] + alpha[None, :])))
    err = np.linalg.norm(B_rec - B_hat) / np.linalg.norm(B_hat)
    return mu, alpha, B_rec, err, d

B_hat =  np.array([[0.095913, 0.042598, 0.014004, 0.00215],
         [0.042598, 0.093814, 0.021969, 0.002542],
         [0.014004, 0.021969, 0.095596, 0.005677],
         [0.00215, 0.002542, 0.005677, 0.138492]
          ])
mu, alpha, B_rec, err, d = robust_fit(B_hat, latent_dim_max=10, lambda_reg=0.1, allow_individual_alpha=True)